5101 weekly
5206 3.29 3.21
5094 4.12
5145 4.14

import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline
res=pd.read_csv("multipleChoiceResponses.csv")
res.head()

res.boxplot(column = 'Age',showmeans=True, meanline=True, flierprops = {'marker':'o','markerfacecolor':'red','color':'black'})
plt.title('Age of participants') #setting title
plt.ylabel('Age')#setting label of y axis
plt.show()

q1 = res['Age'].min()#the minimal value of age
q11 = round(q1)
q2 = res['Age'].max()#the maximal value of age
q22 = round(q2)
q3 = res['Age'].mean()#the mean value of age
q33 = round(q3)
q4 = res['Age'].quantile(0.25)#the one forth value of age
q44 = round(q4)
q5 = res['Age'].quantile(0.5)#the median of age
q55 = round(q5)
q6 = res['Age'].quantile(0.75)#the three forth value of age
q66 = round(q6)
print("The minimal value of age is", q11)
print("The maximal value of age is", q22)
print("The mean value of age is", q33)
print("The one forth value of age is", q44)
print("The median of age is", q55)
print("The three forth value of age is", q66)

The minimal value of age is 20,
The maximal value of age is 78,
The mean value of age is 34,
The one forth value of age is 27,
The median of age is 32,
The three forth value of age is 39.

First, the age span is big. Minimum is 20, maximum is 78. According to the boxplot, ages above about 58 are considered as outliers, median age is close to mean age,the age of major part of participants are relatively young, but the elder people also take a high percent among all participants.  

fun = {'Age':{'over 65': lambda x : sum(e>65 for e in x), 
              'under 18': lambda y : sum(e<18 for e in y),
             'between 18 and 65': lambda z : sum(17<e<66 for e in z)}}
groupbyClass = res.groupby('CurrentJobTitleSelect').agg(fun).reset_index()
groupbyClass.columns = groupbyClass.columns.droplevel(0)
groupbyClass.rename(columns = {'':'CurrentJobTitleSelect'},inplace = True) 
groupbyClass

fun = {'GenderSelect':{'number':'count'}}
group1 = res.groupby('GenderSelect').agg(fun).reset_index()#count the number of each gender
group1#get the number of GenderSelect
number = [36,496,3008]
plt.bar(range(3), number, 0.4, alpha = 0.8)#draw the barchart
plt.ylabel('Number')#setting laber of y axis
plt.title('Gender of participants')#setting title
plt.xticks(range(3),['A different identity','Female','Male'])#change x axis name
for x,y in enumerate(number):
    plt.text(x,y+1,'%s' %y,ha='center')#display number of each gender
plt.show()

Total = 3008+496+36
Male = 3008
Female = 496
p1 = Male/Total
p11 = round(p1,2)
p2 = Female/Total
p22 = round(p2,2)

print(p11,"% respondants are men")
print(p22,"% respondants are women")

res.boxplot(column = 'Age', by = 'GenderSelect')
plt.show()

top_country = res.Country.value_counts()
plt.bar(range(5), top_country[:5], 0.5, alpha = 1)
plt.ylabel('Number')#setting laber of y axis
plt.title('Top 5 country of participants')#setting title
for x,y in enumerate(top_country[:5]):
    plt.text(x,y+1,'%s' %y,ha='center')
plt.xticks(range(5),['United State','India','UK','Germany','Brazil'])

gender_count = res.groupby(by = 'GenderSelect').size()
label = gender_count.index
size = gender_count.values
plt.pie(size,labels = label,autopct = '%1.1f%%', shadow = True, startangle = 90)
plt.legend()
plt.axis('equal')
plt.show()

res1.loc[res1['Country'].isin['Australia','United Kingdom','United States']]
fun1 = {'Country':'count'}
res1.groupby('EmploymentStatus').agg(fun1)

plot = groupbyCountry.plot('Country','Average salary',kind='barh',
                                title='Average salary of all countries',stacked=True,
                           figsize=(20,20),fontsize=20,legend=None)


brand = shoes.groupby(by='Brand').size()
label = brand.index
size = brand.values
plt.pie(size,labels = label,autopct = '%1.1f%%', shadow = True, startangle = 90,labeldistance=2,radius=100)
plt.legend(loc="upper right",bbox_to_anchor=(2.1,1.05))
plt.axis('equal')
plt.show()








plt.bar(range(5), top_location[:5], 0.5, alpha = 1)
plt.ylabel('Number')#setting laber of y axis
plt.title('Top 5 city of consumer')#setting title
for x,y in enumerate(top_location[:5]):
    plt.text(x,y+1,'%s' %y,ha='center')
plt.xticks(range(5),['Shanghai','Beijing','Guangzhou','Ningbo','Zibo'])

import datetime 
shoes['DealTime'] = shoes['DealTime'].astype(str).apply(lambda time: datetime.datetime.strftime(datetime.datetime.strptime(time,'%Y/%m'),'%Y/%m'))
plt.figure(figsize=(50,50))
plt.scatter(shoes['DealTime'], shoes['Brand'])
plt.xticks(rotation=90)
plt.title('Salary & Age',fontsize=20)
plt.show()

n = res.groupby(['Country','GenderSelect'])['Age'].agg([np.mean,np.median]).loc[['United States','United Kingdom','Germany','Australia','India'],:]